import scrapy
from scrapy_fiction.items import ScrapyFictionItem

class FictionSpiderSpider(scrapy.Spider):
    name = 'fiction_spider'
    allowed_domains = ['www.shuquge.com']
    start_urls = ['http://www.shuquge.com/txt/100/69717.html']

    def parse(self, response):
        fiction_item = ScrapyFictionItem()
        fiction_item['title'] = response.xpath('//h1/text()').extract()
        content_html = response.xpath('//div[@class="showtxt"]/text()').extract()
        content_str = ""
        for i_content in content_html:
            content_s = "\r".join(i_content.split())
            content_str = content_str + content_s + "\n"
        # print(content_str)
        # for i in content_str:
        #     print(i)
        fiction_item['content'] = content_str
        yield fiction_item
        next_link = response.xpath('//div[@class="page_chapter"]/ul/li[3]/a/@href').extract()
        print(next_link)
        judg = 'index' in next_link[0]
        if judg:
            print('爬取完成')
        else:
            print('爬取下一章')
            yield scrapy.Request("http://www.shuquge.com/txt/100/"+next_link[0],callback=self.parse)